library(brms)
library(data.table)

# SET WORKING DIRECTORY
setwd("/mnt/c/Users/drmil/Dropbox/White House Lobbying/3YP/APSR Final/Replication Archive/visitor_logs_analyses")

# READ IN DATA FOR ANALYSIS
clinton_logs_analysis <- fread("clinton_logs_analysis.csv", header = TRUE, stringsAsFactors = FALSE)

model_formula <- brmsformula(any_visits_leq0 ~ any_visits_leq0_1l + 
                               log(lobbyexp_1l+1) + made_donations_l1tol4 + 
                               made_donations_l1tol4:log(amt_donations_1ltol4+1) +
                               industry_pid + inhouse_lobby +
                               ACC + ADV + AER + AGR + ALC + ANI + APP + ART + AUT + 
                               AVI + BAN + BEV + BNK + BUD + CAW + CDT + CHM + CIV + 
                               COM + CON + CPI + CPT + CSP + DEF + DIS + DOC + ECN + 
                               EDU + ENG + ENV + FAM + FIN + FIR + FOO + FOR + FUE + 
                               GAM + GOV + HCR + HOU + IMM + IND + INS + LAW + LBR + 
                               MAN + MAR + MED + MIA + MMM + MON + NAT + PHA + POS + 
                               REL + RES + RET + ROD + RRR + SCI + SMB + SPO + TAX + 
                               TEC + TOB + TOR + TOU + TRA + TRD + TRU + UNM + URB + 
                               UTI + VET + WAS + WEL +
                               (1|q|yearperiod) + 
                               (1|p|crp_orgname:crp_industry) + 
                               (1|r|crp_industry))

# USE MODEL FORMULA TO CREATE DATA FRAME TO ASSIGN PRIORS
priors_df <-get_prior(model_formula,
                      data = clinton_logs_analysis, 
                      family=bernoulli(link = "logit"))

# ASSIGN DIFFUSE PRIORS FOR BETA COEFFICIENTS, VARYING INTERCEPTS FOR EACH
# GROUPING, AND CORRELATION AMONG EACH GROUPING OF VARYING INTERCEPTS
priors_df$prior <- ifelse(priors_df$class=="b", "normal(0, 10)", priors_df$prior)
priors_df$prior <- ifelse(priors_df$class=="cor", "lkj(1)", priors_df$prior)
priors_df$prior <- ifelse(priors_df$class=="sd"|priors_df$class=="Intercept", 
                          "student_t(3, 0, 10)", priors_df$prior)

# FITTING MODEL--NOTE THAT MODEL USES BOTH WITHIN- AND ACROSS-CHAIN 
# PARALLELIZATION
tableSI6_col1 <- brm(model_formula,
                        data= clinton_logs_analysis, 
                        family=bernoulli(link = "logit"), 
                        prior = priors_df,
                        refresh = 10, 
                        control=list(adapt_delta=0.999999999999999,
                                     max_treedepth = 20),
                        threads = threading(threads = 3, static = TRUE), 
                        backend = "cmdstanr",
                        chains = 4, 
                        cores = 4, 
                        iter = 2000, 
                        seed = 1989)

summary(tableSI6_col1)

# model diagnostics
# any divergent transitions?
sum(nuts_params(tableSI6_col1, pars = "divergent__")$Value)
# any rhats above 1.1?
sort(rhat(tableSI6_col1), decreasing = TRUE)[1:10]
# neff ratio sufficiently large (>0.001 for all parameters?)
sort(neff_ratio(tableSI6_col1), decreasing = FALSE)[1:10]
save(tableSI6_col1, file="tableSI6_col1.RData")